#红楼梦人物出现次数
import jieba
excludes = {"什么","一个","我们","那里","如今","你们","说道","起来",
            "姑娘","这里","出来","他们","众人","奶奶","自己","一面",
            "太太","只见","怎么","两个","没有","不是","不知","这个",
            "知道","听见","这样","进来","告诉","东西","咱们","就是",
            "回来","大家","只是","老爷","只得","丫头","这些","不敢",
            "出去","所以","不过"}
txt = open("./素材/hlm.txt","r",encoding='utf-8').read()
words = jieba.lcut(txt)
counts = {}
for word in words:
    if len(word) == 1:
        continue
    elif word == '宝玉':
        rword = '贾宝玉'
    elif word == '凤姐':
        rword = '王熙凤'
    elif word == '老太太':
        rword = '贾母'
    elif word == '宝钗':
        rword = '薛宝钗'
    elif word == '黛玉':
        rword = '林黛玉'
    elif word == '二太太':
        rword = '王夫人'
    elif word == '琏二爷':
        rword = '贾琏'
    elif word == '平姐姐':
        rword = '平儿'
    elif word == '薛夫人':
        rword = '薛姨妈'
    else:
        rword = word
    counts[rword] = counts.get(rword,0) + 1
for word in excludes:
    del(counts[word])
items = list(counts.items())
items.sort(key = lambda x:x[1],reverse=True)
for i in range(10):
    word,count = items[i]
    print("{0:<10}{1:>5}".format(word,count))